{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:541: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint8 = np.dtype([(\"qint8\", np.int8, 1)])\n",
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:542: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint8 = np.dtype([(\"quint8\", np.uint8, 1)])\n",
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:543: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint16 = np.dtype([(\"qint16\", np.int16, 1)])\n",
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:544: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_quint16 = np.dtype([(\"quint16\", np.uint16, 1)])\n",
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:545: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  _np_qint32 = np.dtype([(\"qint32\", np.int32, 1)])\n",
      "/home/alexschneidman/anaconda3/envs/ope/lib/python3.7/site-packages/tensorboard/compat/tensorflow_stub/dtypes.py:550: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.\n",
      "  np_resource = np.dtype([(\"resource\", np.ubyte, 1)])\n"
     ]
    }
   ],
   "source": [
    "# Imports\n",
    "\n",
    "import argparse\n",
    "import json\n",
    "import logging\n",
    "import os\n",
    "import random\n",
    "import sys\n",
    "from dataclasses import dataclass\n",
    "from typing import Tuple\n",
    "from multiprocessing import Pool\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import torch\n",
    "from torch import Tensor\n",
    "\n",
    "from reagent.ope.estimators.estimator import Estimator, EstimatorResult\n",
    "from reagent.ope.estimators.contextual_bandits_estimators import (\n",
    "    Action,\n",
    "    ActionDistribution,\n",
    "    ActionRewards,\n",
    "    BanditsEstimatorInput,\n",
    "    BanditsModel,\n",
    "    DMEstimator,\n",
    "    DoublyRobustEstimator,\n",
    "    IPSEstimator,\n",
    "    LogSample,\n",
    "    SwitchEstimator,\n",
    "    SwitchDREstimator\n",
    ")\n",
    "from reagent.ope.estimators.types import ActionSpace, Policy, Trainer\n",
    "from reagent.ope.trainers.linear_trainers import (\n",
    "    LogisticRegressionTrainer,\n",
    "    SGDClassifierTrainer,\n",
    "    TrainingData,\n",
    "    DecisionTreeTrainer,\n",
    "    LinearTrainer,\n",
    "    NNTrainer\n",
    ")\n",
    "from reagent.ope.test.multiclass_bandits import (\n",
    "    MultiClassDataRow,\n",
    "    UCIMultiClassDataset,\n",
    "    MultiClassContext,\n",
    "    MultiClassModel,\n",
    "    MultiClassPolicy,\n",
    "    evaluate_all\n",
    ")\n",
    "from reagent.ope.utils import RunningAverage, Clamper\n",
    "\n",
    "import matplotlib\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Configuration Settings\n",
    "\n",
    "Edit the experiments list with the names of UCI datasets given in reagent/test/data to produce results for each dataset. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Configuration\n",
    "\n",
    "DEFAULT_ITERATIONS = 500\n",
    "TEST_ROOT_PATH = '..'\n",
    "UCI_DATASET_CONFIGS = os.path.join(TEST_ROOT_PATH, 'configs')\n",
    "MAX_METRIC_NAME_LENGTH = 20\n",
    "experiments = [\"ecoli\", \"letter_recog\", \"pendigits\", \"optdigits\", \"satimage\"]\n",
    "\n",
    "experiment_params = []\n",
    "for exp in experiments:\n",
    "    with open(os.path.join(UCI_DATASET_CONFIGS, exp + '_config.json'), \"r\") as f:\n",
    "        params = json.load(f)\n",
    "        if \"dataset\" in params:\n",
    "            if \"file\" in params[\"dataset\"]:\n",
    "                params[\"dataset\"][\"file\"] = os.path.join(TEST_ROOT_PATH, params[\"dataset\"][\"file\"])\n",
    "        experiment_params.append({\"name\": exp, \"params\": params})     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Run an experiment\n",
    "\n",
    "We load the given dataset, and create trainers (which will be used for generating the policies for the logger and target). To try different trainers, modify the `log_trainer` and `tgt_trainer` variables with different `LinearTrainer`s. \n",
    "\n",
    "Note that DM's performance is highly dependent on the reward model. To try different reward models, modify the trainer passed into `DMEstimator` and `DoublyRobustEstimator` with different `LinearTrainer`s. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def load_dataset(params):\n",
    "        return UCIMultiClassDataset(params[\"dataset\"])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Experiment(s)\n",
    "def run_experiment(dataset): \n",
    "    random.seed(1234)\n",
    "    np.random.seed(1234)\n",
    "    torch.random.manual_seed(1234)\n",
    "\n",
    "    log_trainer = LogisticRegressionTrainer()\n",
    "    log_epsilon = 0.1\n",
    "    tgt_trainer = SGDClassifierTrainer()\n",
    "    tgt_epsilon = 0.1\n",
    "    experiments = [\n",
    "        (\n",
    "            (\n",
    "                SwitchEstimator(LogisticRegressionTrainer(), rmax=1.0),\n",
    "                SwitchDREstimator(LogisticRegressionTrainer(), rmax=1.0),\n",
    "                DMEstimator(LogisticRegressionTrainer()),\n",
    "                IPSEstimator(),\n",
    "                DoublyRobustEstimator(LogisticRegressionTrainer()),\n",
    "            ),\n",
    "            1000,\n",
    "        )\n",
    "        for _ in range(100)\n",
    "    ]\n",
    "    results = evaluate_all(\n",
    "        experiments, dataset, log_trainer, log_epsilon, tgt_trainer, tgt_epsilon, 0\n",
    "    )\n",
    "    return results\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Result Generation\n",
    "\n",
    "For each UCI dataset, we generate a logging and target policy, create a simulated dataset using the logging policy, and evaluate the target policy using DM, IPS, and DR. The bias, rmse, and variance against the ground truth is plotted for each dataset. \n",
    "\n",
    "\n",
    "For the settings with the logging policy trained with a `LogisticRegressionTrainer`, the target policy with a `SGDClassifierTrainer`, and the reward model for DM and DR trained with a `LogisticRegressionTrainer`, a sample result gives:\n",
    "\n",
    "\n",
    "![alt text](img/bias.png \"Bias\")![alt text](img/variance.png \"Bias\")![alt text](img/rmse.png \"Bias\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "datasets = []\n",
    "for params in experiment_params:\n",
    "    datasets.append(load_dataset(params['params']))\n",
    "    \n",
    "labels = []\n",
    "\n",
    "bias_result_mapping = {}\n",
    "var_result_mapping = {}\n",
    "rmse_result_mapping = {}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Running experiment ecoli\n",
      "SwitchEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.6409800000000001 tgt_reward[0.681124829351902] gt_reward[0.68099], diffs: tgt-gt[samples=100, rmse=0.018109501162651483, bias=0.00013482935190194833, variance=0.000331248336773678] tgt-log[samples=100, rmse=0.04037231561536808, bias=0.04014482935190198, variance=1.850156005410315e-05]\n",
      "SwitchDREstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.6409800000000001 tgt_reward[0.6827095168828964] gt_reward[0.68099], diffs: tgt-gt[samples=100, rmse=0.017383191095996103, bias=0.0017195168828963636, variance=0.0003022410044134877] tgt-log[samples=100, rmse=0.0420135565438498, bias=0.04172951688289641, variance=2.402662038720069e-05]\n",
      "DMEstimator(trainer(logistic_regression,device(None)) rewards: log_reward0.6409800000000001 tgt_reward[0.5767845714374259] gt_reward[0.68099], diffs: tgt-gt[samples=100, rmse=0.11443995653971288, bias=-0.10420542856257413, variance=0.002260335667577406] tgt-log[samples=100, rmse=0.07716353279482184, bias=-0.06419542856257408, variance=0.001851674489944438]\n",
      "IPSEstimator(weight_clamper(Clamper(-inf,inf)),weighted(False),device(None)) rewards: log_reward0.6409800000000001 tgt_reward[0.6823636658191] gt_reward[0.68099], diffs: tgt-gt[samples=100, rmse=0.01882527740452088, bias=0.0013736658190999795, variance=0.0003560647591662634] tgt-log[samples=100, rmse=0.04206649112797752, bias=0.04138366581910002, variance=5.755745373057047e-05]\n",
      "DoublyRobustEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.6409800000000001 tgt_reward[0.6817410593008515] gt_reward[0.68099], diffs: tgt-gt[samples=100, rmse=0.017341787591402112, bias=0.0007510593008514166, variance=0.00030320556241607353] tgt-log[samples=100, rmse=0.04114618984290583, bias=0.04076105930085145, variance=3.186361945547262e-05]\n",
      "Running experiment letter_recog\n",
      "SwitchEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.57266 tgt_reward[0.37569143682718276] gt_reward[0.37457999999999997], diffs: tgt-gt[samples=100, rmse=0.017854429347174034, bias=0.0011114368271827103, variance=0.0003207528843357692] tgt-log[samples=100, rmse=0.1970799629435354, bias=-0.19696856317281733, variance=4.434031864764246e-05]\n",
      "SwitchDREstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.57266 tgt_reward[0.39171935588121415] gt_reward[0.37457999999999997], diffs: tgt-gt[samples=100, rmse=0.03417343925054195, bias=0.017139355881214083, variance=0.0008828953840278517] tgt-log[samples=100, rmse=0.18214352227137348, bias=-0.18094064411878594, variance=0.00044115758717292716]\n",
      "DMEstimator(trainer(logistic_regression,device(None)) rewards: log_reward0.57266 tgt_reward[0.5635916976264305] gt_reward[0.37457999999999997], diffs: tgt-gt[samples=100, rmse=0.1904276330665789, bias=0.1890116976264305, variance=0.0005426884805196267] tgt-log[samples=100, rmse=0.015867718348869037, bias=-0.009068302373569526, variance=0.0001712630077379278]\n",
      "IPSEstimator(weight_clamper(Clamper(-inf,inf)),weighted(False),device(None)) rewards: log_reward0.57266 tgt_reward[0.375998918900146] gt_reward[0.37457999999999997], diffs: tgt-gt[samples=100, rmse=0.01824032850522253, bias=0.0014189189001458874, variance=0.00033403661932650735] tgt-log[samples=100, rmse=0.19681556707994985, bias=-0.1966610810998541, variance=6.140063195842609e-05]\n",
      "DoublyRobustEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.57266 tgt_reward[0.37928051044302813] gt_reward[0.37457999999999997], diffs: tgt-gt[samples=100, rmse=0.03347790030442483, bias=0.0047005104430280905, variance=0.001109772737745446] tgt-log[samples=100, rmse=0.19478690644998073, bias=-0.1933794895569719, variance=0.0005518302454934725]\n",
      "Running experiment pendigits\n",
      "SwitchEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.8269399999999999 tgt_reward[0.7546563205122948] gt_reward[0.75397], diffs: tgt-gt[samples=100, rmse=0.015537334667333605, bias=0.0006863205122947092, variance=0.00024337144719104064] tgt-log[samples=100, rmse=0.07248482839355978, bias=-0.07228367948770521, variance=2.941416864900794e-05]\n",
      "SwitchDREstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.8269399999999999 tgt_reward[0.778800046145916] gt_reward[0.75397], diffs: tgt-gt[samples=100, rmse=0.03441568664250387, bias=0.024830046145915927, variance=0.0005736447428956617] tgt-log[samples=100, rmse=0.05062061183046669, bias=-0.048139953854084, variance=0.00024746584345196695]\n",
      "DMEstimator(trainer(logistic_regression,device(None)) rewards: log_reward0.8269399999999999 tgt_reward[0.8175922172337771] gt_reward[0.75397], diffs: tgt-gt[samples=100, rmse=0.0656767363156423, bias=0.06362221723377698, variance=0.0002683304720530327] tgt-log[samples=100, rmse=0.01131064331883703, bias=-0.009347782766222957, variance=4.095920165803824e-05]\n",
      "IPSEstimator(weight_clamper(Clamper(-inf,inf)),weighted(False),device(None)) rewards: log_reward0.8269399999999999 tgt_reward[0.7546501099407741] gt_reward[0.75397], diffs: tgt-gt[samples=100, rmse=0.015550695689640714, bias=0.0006801099407739219, variance=0.00024379958272754629] tgt-log[samples=100, rmse=0.07249068782194369, bias=-0.072289890059226, variance=2.9365268811629092e-05]\n",
      "DoublyRobustEstimator(trainer(logistic_regression),weight_clamper(Clamper(-inf,inf)),device(None)) rewards: log_reward0.8269399999999999 tgt_reward[0.7560407283385557] gt_reward[0.75397], diffs: tgt-gt[samples=100, rmse=0.027855609742539835, bias=0.0020707283385555174, variance=0.0007794414932086698] tgt-log[samples=100, rmse=0.0741953563333444, bias=-0.0708992716614444, variance=0.0004830749285946023]\n",
      "Running experiment optdigits\n"
     ]
    }
   ],
   "source": [
    "for dataset, params in zip(datasets, experiment_params):\n",
    "    print(\"Running experiment \" + params[\"name\"])\n",
    "    if params[\"name\"] in labels:\n",
    "        continue\n",
    "    exp_results = run_experiment(dataset)\n",
    "    labels.append(params[\"name\"])\n",
    "\n",
    "    for estimator_name, result in exp_results.items():\n",
    "        _, _, _, tgt_gt, _, _ = result.report()\n",
    "        if not estimator_name in bias_result_mapping:\n",
    "            bias_result_mapping[estimator_name] = []\n",
    "        if not estimator_name in var_result_mapping:\n",
    "            var_result_mapping[estimator_name] = []\n",
    "        if not estimator_name in rmse_result_mapping:\n",
    "            rmse_result_mapping[estimator_name] = []\n",
    "\n",
    "        bias_result_mapping[estimator_name].append(tgt_gt.bias.cpu().numpy())\n",
    "        var_result_mapping[estimator_name].append(tgt_gt.variance.cpu().numpy())\n",
    "        rmse_result_mapping[estimator_name].append(tgt_gt.rmse.cpu().numpy())\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Generate Bar Charts, a la https://arxiv.org/pdf/1511.03722.pdf\n",
    "\n",
    "def create_and_show_chart(labels, results, title):\n",
    "    # Width of each bar\n",
    "    width = 0.1\n",
    "\n",
    "    metrics = list(results.keys())\n",
    "    \n",
    "    # Set position of bar on X axis\n",
    "    barpos = [np.arange(len(results[metrics[0]]))]\n",
    "    for m in range(len(metrics)-1):\n",
    "        barpos.append([x + width for x in barpos[-1]])\n",
    "        \n",
    "    fig, ax = plt.subplots()\n",
    "    for metric, barpositions in zip(metrics, barpos):\n",
    "        ax.bar(barpositions, results[metric], width, label=metric[:MAX_METRIC_NAME_LENGTH])\n",
    "\n",
    "    ax.set_ylabel(title)\n",
    "    plt.xticks([r + width for r in range(len(labels))], labels)\n",
    "    ax.set_xticklabels(labels)\n",
    "    ax.legend()\n",
    "\n",
    "    fig.tight_layout()\n",
    "\n",
    "    plt.show()\n",
    "\n",
    "create_and_show_chart(labels, bias_result_mapping, 'Bias')\n",
    "create_and_show_chart(labels, rmse_result_mapping, 'RMSE')\n",
    "create_and_show_chart(labels, var_result_mapping, 'Variance')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "celltoolbar": "Attachments",
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
